# The IQR is the length of the box in the box-and-whisker plot for a given 
# course and distance. An outlier is any value that lies more than one and a 
# half times the length of the box from either end of the box. That is, if a 
# data point is below Q1 – 1.5×IQR or above Q3 + 1.5×IQR, it is viewed as being 
# too far from the central values to be reasonable.

library(plyr)

stats <- ddply(races, .(course, distance_yards), .fun =
  function(DF) {
    rslt <- boxplot.stats(DF$winning_time_secs) 
    c(lo = rslt$stats[1], hi = rslt$stats[5], n = rslt$n)                 
  }
)
races <- merge(races, stats, by = c("course", "distance_yards"))

# Exclude outliers and courses and distances with fewer than 20 races
races <- subset(races, winning_time_secs >= lo && winning_time_secs <= hi
                && n >= 20)

races <- subset(races, T, -c(lo, hi))
rm(stats)